
clear all
set more off
cap log close

cap log close
log using "$log/02_data_prep.log", replace

***************************************
* CLEAN BeH DATA AND CREATE KEY VARIABLES *
***************************************

use $BeH_large if inrange(year,1998,2018), clear

set seed 20210505

merge m:1 persnr using "$temp/train_end", keepusing(train_end train_end_age)
drop if _merge == 1 // persons for which we find no valid training
drop if _merge == 2 
drop _merge

cap rename tentgelt wage
cap rename beh_ausbildung bild
gen beruf = floor(beh_beruf_num/10)

*Trained and received certificate
gen x = (inlist(beh_pers_gr,102,141))
bysort persnr: egen trained = max(x)
drop x

gen x = (inlist(bild,2,4))
bysort persnr: egen certificate = max(x)
drop x 
keep if trained==1 		

drop if endorig <= train_end 

bysort persnr: egen x = min(begorig) if !inlist(beh_pers_gr,102,141)
bysort persnr: egen first = mean(x)
drop x	
 
drop if train_end_age > 28	
drop if train_end_age < 18

gen tag = (begorig < first)
bysort persnr: egen tag2 = max(tag)
sort persnr begorig endorig
drop tag tag2

merge m:1 persnr using "$temp/train_end_no_drop"
drop if _merge == 1
drop if _merge == 2
drop _merge

compress
save "$data\full_with_E_new.dta", replace

clear
cap log close
